import geopandas as gpd
import tobler
import pyinterpolate
import numpy as np
from libpysal import graph
from sklearn import neighbors
from scipy import interpolateSpatial interpolation
The contents will appear later
This section will contain hands-on material that will appear later.
Area interpolation
simd = gpd.read_file("data/edinburgh_simd_2020.gpkg")
simd.head(2)| DataZone | DZName | LAName | SAPE2017 | WAPE2017 | Rankv2 | Quintilev2 | Decilev2 | Vigintilv2 | Percentv2 | ... | CrimeRate | CrimeRank | HouseNumOC | HouseNumNC | HouseOCrat | HouseNCrat | HouseRank | Shape_Leng | Shape_Area | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | S01008417 | Balerno and Bonnington Village - 01 | City of Edinburgh | 708 | 397 | 5537 | 4 | 8 | 16 | 80 | ... | 86 | 5392.0 | 17 | 8 | 2% | 1% | 6350.0 | 20191.721420 | 1.029993e+07 | POLYGON ((315157.369 666212.846, 315173.727 66... |
| 1 | S01008418 | Balerno and Bonnington Village - 02 | City of Edinburgh | 691 | 378 | 6119 | 5 | 9 | 18 | 88 | ... | 103 | 5063.0 | 7 | 10 | 1% | 1% | 6650.0 | 25944.861787 | 2.357050e+07 | POLYGON ((317816.000 666579.000, 318243.000 66... |
2 rows × 52 columns
simd[["EmpRate", "geometry"]].explore("EmpRate", tiles="CartoDB Positron")/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/explore.py:400: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
elif pd.api.types.is_categorical_dtype(gdf[column]):
Make this Notebook Trusted to load map: File -> Trust Notebook
grid_8 = tobler.util.h3fy(simd, resolution=8)
grid_8.head(2)/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/pyproj/crs/crs.py:1293: UserWarning: You will likely lose important projection information when converting to a PROJ string from another format. See: https://proj.org/faq.html#what-is-the-best-format-for-describing-coordinate-reference-systems
proj = self._crs.to_proj4(version=version)
| geometry | |
|---|---|
| hex_id | |
| 881972393dfffff | POLYGON ((328193.671 671699.739, 327730.157 67... |
| 8819727743fffff | POLYGON ((318448.651 677311.606, 317984.690 67... |
m = simd.boundary.explore(tiles="CartoDB Positron")
grid_8.boundary.explore(m=m, color="red")Make this Notebook Trusted to load map: File -> Trust Notebook
overlay
interpolated = tobler.area_weighted.area_interpolate(
simd,
grid_8,
extensive_variables=["EmpNumDep", "IncNumDep"],
intensive_variables=["EmpRate", "IncRate"],
)interpolated.explore("EmpRate", tiles="CartoDB Positron")/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/explore.py:400: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
elif pd.api.types.is_categorical_dtype(gdf[column]):
Make this Notebook Trusted to load map: File -> Trust Notebook
Point interpolation
airbnb = gpd.read_file("data/edinburgh_airbnb_2023.gpkg")
airbnb.head()| id | bedrooms | property_type | price | geometry | |
|---|---|---|---|---|---|
| 0 | 15420 | 1.0 | Entire rental unit | $126.00 | POINT (325921.137 674478.931) |
| 1 | 790170 | 2.0 | Entire condo | $269.00 | POINT (325976.360 677655.252) |
| 2 | 24288 | 2.0 | Entire loft | $95.00 | POINT (326069.186 673072.913) |
| 3 | 821573 | 2.0 | Entire rental unit | $172.00 | POINT (326748.646 674001.683) |
| 4 | 822829 | 3.0 | Entire rental unit | $361.00 | POINT (325691.831 674328.127) |
airbnb.price.head()0 $126.00
1 $269.00
2 $95.00
3 $172.00
4 $361.00
Name: price, dtype: object
airbnb["price_float"] = airbnb.price.str.strip("$").str.replace(",", "").astype(float)two_bed_homes = airbnb[
(airbnb["bedrooms"] == 2)
& (airbnb["property_type"] == "Entire rental unit")
& (airbnb["price_float"] < 300)
].copy()
two_bed_homes.head()| id | bedrooms | property_type | price | geometry | price_float | |
|---|---|---|---|---|---|---|
| 3 | 821573 | 2.0 | Entire rental unit | $172.00 | POINT (326748.646 674001.683) | 172.0 |
| 5 | 834777 | 2.0 | Entire rental unit | $264.00 | POINT (324950.724 673875.598) | 264.0 |
| 6 | 450745 | 2.0 | Entire rental unit | $177.00 | POINT (326493.725 672853.904) | 177.0 |
| 10 | 485856 | 2.0 | Entire rental unit | $157.00 | POINT (326597.124 673869.551) | 157.0 |
| 17 | 51505 | 2.0 | Entire rental unit | $155.00 | POINT (325393.807 674177.409) | 155.0 |
two_bed_homes.geometry.duplicated().any()True
two_bed_homes = two_bed_homes.drop_duplicates("geometry")two_bed_homes.explore("price_float", tiles="CartoDB Positron")/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/explore.py:400: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
elif pd.api.types.is_categorical_dtype(gdf[column]):
Make this Notebook Trusted to load map: File -> Trust Notebook
from libpysal import graph
knn = graph.Graph.build_kernel(two_bed_homes, k=10).transform("r")
two_bed_homes["price_lag"] = knn.lag(two_bed_homes.price_float)
two_bed_homes.explore("price_lag", tiles="CartoDB Positron")/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/explore.py:400: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
elif pd.api.types.is_categorical_dtype(gdf[column]):
Make this Notebook Trusted to load map: File -> Trust Notebook
Nearest
grid_10 = tobler.util.h3fy(simd, resolution=10)
len(grid_10)/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/pyproj/crs/crs.py:1293: UserWarning: You will likely lose important projection information when converting to a PROJ string from another format. See: https://proj.org/faq.html#what-is-the-best-format-for-describing-coordinate-reference-systems
proj = self._crs.to_proj4(version=version)
20162
grid_coordinates = grid_10.centroid.get_coordinates()
grid_coordinates.head()| x | y | |
|---|---|---|
| hex_id | ||
| 8a1972766707fff | 321343.980521 | 675606.747254 |
| 8a197275cdb7fff | 315877.854512 | 668791.639689 |
| 8a197239ad17fff | 328805.423248 | 667597.536876 |
| 8a1972742537fff | 316429.948928 | 670257.054100 |
| 8a1972740197fff | 319126.996682 | 670074.407673 |
coords = two_bed_homes.get_coordinates()
coords.head()| x | y | |
|---|---|---|
| 3 | 326748.645636 | 674001.683211 |
| 5 | 324950.723888 | 673875.598033 |
| 6 | 326493.725178 | 672853.903917 |
| 10 | 326597.123684 | 673869.551295 |
| 17 | 325393.807222 | 674177.408961 |
nearest = interpolate.griddata(
coords,
two_bed_homes.price_lag,
grid_coordinates,
method="nearest",
)
nearestarray([189. , 181. , 86. , ..., 188.70398583,
188.70398583, 84. ])
grid_10["nearest"] = nearest_ = grid_10.plot('nearest', legend=True)/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/plotting.py:732: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
if pd.api.types.is_categorical_dtype(values.dtype):

ax = grid_10.plot('nearest', legend=True)
two_bed_homes.plot(ax=ax, color="red", markersize=1)/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/plotting.py:732: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
if pd.api.types.is_categorical_dtype(values.dtype):
<Axes: >
caption here

K-Nearest neighbours regression
Uniform
interpolation_uniform = neighbors.KNeighborsRegressor(n_neighbors=10, weights="uniform")interpolation_uniform.fit(
coords, two_bed_homes.price_lag
)KNeighborsRegressor(n_neighbors=10)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
KNeighborsRegressor(n_neighbors=10)
price_on_grid = interpolation_uniform.predict(grid_coordinates)
price_on_gridarray([126.85397127, 100.56773529, 145.41599385, ..., 120.36697169,
120.36697169, 120.36697169])
grid_10["knn_uniform"] = price_on_grid_ = grid_10.plot("knn_uniform", legend=True)/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/plotting.py:732: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
if pd.api.types.is_categorical_dtype(values.dtype):

Distance-weighted
interpolation_distance = neighbors.KNeighborsRegressor(n_neighbors=10, weights="distance")interpolation_distance.fit(
coords, two_bed_homes.price_lag
)KNeighborsRegressor(n_neighbors=10, weights='distance')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
KNeighborsRegressor(n_neighbors=10, weights='distance')
grid_10["knn_distance"] = interpolation_distance.predict(grid_coordinates)_ = grid_10.plot("knn_distance", legend=True)/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/plotting.py:732: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
if pd.api.types.is_categorical_dtype(values.dtype):

Distance band regression
interpolation_radius = neighbors.RadiusNeighborsRegressor(radius=1000, weights="distance")
interpolation_radius.fit(
coords, two_bed_homes.price_lag
)RadiusNeighborsRegressor(radius=1000, weights='distance')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RadiusNeighborsRegressor(radius=1000, weights='distance')
grid_10["radius"] = interpolation_radius.predict(grid_coordinates)/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/sklearn/neighbors/_regression.py:500: UserWarning: One or more samples have no neighbors within specified radius; predicting NaN.
warnings.warn(empty_warning_msg)
_ = grid_10.plot("radius", legend=True, missing_kwds={'color': 'lightgrey'})/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/plotting.py:732: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
if pd.api.types.is_categorical_dtype(values.dtype):

Ordinary Kriging
input_data = np.hstack([coords, two_bed_homes.price_lag.values.reshape(-1, 1)])
input_dataarray([[3.26748646e+05, 6.74001683e+05, 2.07033397e+02],
[3.24950724e+05, 6.73875598e+05, 1.54805935e+02],
[3.26493725e+05, 6.72853904e+05, 1.43865293e+02],
...,
[3.28513265e+05, 6.74048892e+05, 1.06875409e+02],
[3.26840903e+05, 6.74767224e+05, 1.68848108e+02],
[3.25415664e+05, 6.73345158e+05, 2.15847334e+02]])
step_radius = 100 # meters
max_range = 5000 # meters
exp_semivar = pyinterpolate.build_experimental_variogram(
input_array=input_data,
step_size=step_radius,
max_range=max_range,
)exp_semivar.plot()
semivar = pyinterpolate.build_theoretical_variogram(
experimental_variogram=exp_semivar,
model_name='linear',
sill=exp_semivar.variance,
rang=5000
)semivar.plot()
ordinary_kriging = pyinterpolate.kriging(
observations=input_data,
theoretical_model=semivar,
points=grid_coordinates.values,
no_neighbors=10,
how="ok",
show_progress_bar=False,
)grid_10["ordinary_kriging"] = ordinary_kriging[:, 0]_ = grid_10.plot("ordinary_kriging", legend=True)/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/plotting.py:732: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
if pd.api.types.is_categorical_dtype(values.dtype):

grid_10["variance_error"] = ordinary_kriging[:, 1]
_ = grid_10.plot("variance_error", legend=True)/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/plotting.py:732: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
if pd.api.types.is_categorical_dtype(values.dtype):

Use larger distance
semivar_larger = pyinterpolate.build_theoretical_variogram(
experimental_variogram=exp_semivar,
model_name='linear',
sill=exp_semivar.variance,
rang=15000,
)ordinary_kriging_l = pyinterpolate.kriging(
observations=input_data,
theoretical_model=semivar_larger,
points=grid_coordinates.values,
no_neighbors=10,
how="ok",
show_progress_bar=False,
)grid_10["ok_larger"] = ordinary_kriging_l[:, 0]_ = grid_10.plot("ok_larger", legend=True)/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/plotting.py:732: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
if pd.api.types.is_categorical_dtype(values.dtype):

grid_10["variance_error_l"] = ordinary_kriging_l[:, 1]
_ = grid_10.plot("variance_error_l", legend=True)/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/plotting.py:732: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
if pd.api.types.is_categorical_dtype(values.dtype):
